Note you will have needed to run the download.sh script to get the data.
import os
import pathlib
# set plotly express to white theme
from dotenv import load_dotenv
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
%matplotlib inline
load_dotenv("../.env")
USE_POSTGRES = os.environ.get('DATABASE_USERNAME',False)
# USE_POSTGRES = False
root_data_dir = pathlib.Path("..").resolve() / "data"
ext_data_dir = root_data_dir / "external"
geopackage = root_data_dir / "aped.gpkg"
if USE_POSTGRES:
engine = create_engine(
f"postgresql+psycopg://{os.environ.get('DATABASE_USERNAME')}:{os.environ.get('DATABASE_PASSWORD')}@localhost:5432/{os.environ.get('DATABASE_NAME')}")
else:
import sqlite3
engine = sqlite3.connect(geopackage)
import pandas as pd
members = pd.read_sql("SELECT * from member_aph_47", engine)
members
| id | member_id | member | party | party_abbrev | district | is_senator | is_representative | mp_id | start | ... | RepresentedMinistries | RepresentedShadowMinistries | ParliamentaryPositions | Honours | Occupations | SecondaryOccupations | Qualifications | ElectorateService | PartyParliamentaryService | PartyCommitteeService | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 128 | Eric Abetz | Coalition | LNP | Tasmania | True | False | N26 | 1994-02-22 | ... | [Parliamentary Secretary, Special Minister of ... | [Shadow Minister, Shadow Minister, Shadow Mini... | [] | {} | [Barrister and solicitor] | [Barrister, Legal, Social and Welfare Professi... | [Bachelor of Arts, University of Tasmania, Bac... | [] | [{'RoSId': 26985, 'RoSType': 'Parliamentary Se... | [{'RoSId': 943, 'RoSType': 'Committee Service'... |
| 1 | 2 | 182 | Mary Doyle | Australian Labor Party | ALP | Aston | False | True | 299962 | 2023-04-01 | ... | [] | [] | [] | {} | [] | [] | [] | [] | [] | [] |
| 2 | 3 | 54 | Anthony Albanese | Australian Labor Party | ALP | Grayndler | False | True | R36 | 1996-03-02 | ... | [Cabinet Minister, Cabinet Minister, Cabinet M... | [Shadow Parliamentary Secretary, Shadow Parlia... | [] | {} | [Senior Policy Adviser to the Premier, the Hon... | [Credit or Loans Officer, Numerical Clerks, Cl... | [Bachelor of Economics, University of Sydney] | [{'Electorate': 'Grayndler', 'State': 'New Sou... | [{'RoSId': 27149, 'RoSType': 'Parliamentary Se... | [{'RoSId': 1127, 'RoSType': 'Committee Service... |
| 3 | 4 | 187 | John Alexander | Coalition | LNP | Bennelong | False | True | M3M | 2010-08-21 | ... | [] | [] | [] | {"Australian Sports Medal, 2000","Awarded a Me... | [Federation Cup Captain/Coach, 2005., Sports c... | [Tennis Coach, Sportspersons nec, Chief Execut... | [] | [{'Electorate': 'Bennelong', 'State': 'New Sou... | [{'RoSId': 27156, 'RoSType': 'Parliamentary Se... | [{'RoSId': 1133, 'RoSType': 'Committee Service... |
| 4 | 5 | 52 | Katie Allen | Coalition | LNP | Higgins | False | True | 282986 | 2019-05-18 | ... | [] | [] | [] | {"Fellow of the Australian Academy of Health a... | [Scientific Advisory Board Member at Before Br... | [Life Scientists nec, Research and Development... | [Doctor of Philosophy, University of Melbourne... | [{'Electorate': 'Higgins', 'State': 'Victoria'... | [{'RoSId': 54212, 'RoSType': 'Parliamentary Se... | [{'RoSId': 54504, 'RoSType': 'Committee Servic... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 278 | 279 | 205 | Jason Wood | Coalition | LNP | La Trobe | False | True | E0F | 2013-09-07 | ... | [Assistant Minister] | [Shadow Parliamentary Secretary, Shadow Parlia... | [] | {"Awarded a Centenary Medal, 01.01.2001","Quee... | [Senior Sergeant in the Counter Terrorism Coor... | [Police Officer, Detective, Protective Service... | [Graduate Diploma Innovation Service Managemen... | [{'Electorate': 'La Trobe', 'State': 'Victoria... | [{'RoSId': 27705, 'RoSType': 'Parliamentary Se... | [{'RoSId': 2472, 'RoSType': 'Committee Service... |
| 279 | 280 | 265 | Ken Wyatt | Coalition | LNP | Hasluck | False | True | M3A | 2010-08-21 | ... | [Assistant Minister, Assistant Minister, Assis... | [] | [] | {"Indigenous Education Warrior Award, World In... | [Director for Aboriginal Health at the Departm... | [Management Consultant, Policy and Planning Ma... | [Bachelor of Education, Churchlands College of... | [{'Electorate': 'Hasluck', 'State': 'Western A... | [{'RoSId': 27710, 'RoSType': 'Parliamentary Se... | [{'RoSId': 2479, 'RoSType': 'Committee Service... |
| 280 | 281 | 242 | Terry Young | Coalition | LNP | Longman | False | True | 201906 | 2019-05-18 | ... | [] | [] | [Member of the Speaker's Panel] | {} | [Franchisee, Drummond Golf, Maroochydore from ... | [Other Factory Process Workers nfd, Service St... | [] | [{'Electorate': 'Longman', 'State': 'Queenslan... | [{'RoSId': 54008, 'RoSType': 'Parliamentary Se... | [{'RoSId': 54534, 'RoSType': 'Committee Servic... |
| 281 | 282 | 230 | Tony Zappia | Australian Labor Party | ALP | Makin | False | True | HWB | 2007-11-24 | ... | [] | [Shadow Parliamentary Secretary, Shadow Parlia... | [] | {} | [Fitness centre manager from 1981 to 2007., Re... | [Bank Worker, Fitness Centre Manager, Numerica... | [] | [{'Electorate': 'Makin', 'State': 'South Austr... | [{'RoSId': 27719, 'RoSType': 'Parliamentary Se... | [{'RoSId': 2501, 'RoSType': 'Committee Service... |
| 282 | 283 | 63 | Trent Zimmerman | Coalition | LNP | North Sydney | False | True | 203092 | 2015-12-05 | ... | [] | [] | [Member of the Speaker's Panel] | {} | [Deputy Chief Executive Officer and Director o... | [Information and Organisation Professionals ne... | [] | [{'Electorate': 'North Sydney', 'State': 'New ... | [{'RoSId': 27723, 'RoSType': 'Parliamentary Se... | [{'RoSId': 2508, 'RoSType': 'Committee Service... |
283 rows × 62 columns
party_counts = members.groupby(["party", "Gender"]).size().reset_index(name='count')
party_counts["%"] = (party_counts["count"] / party_counts.groupby("party")["count"].transform("sum")) * 100
party_counts
| party | Gender | count | % | |
|---|---|---|---|---|
| 0 | Australian Greens | Female | 10 | 55.555556 |
| 1 | Australian Greens | Male | 8 | 44.444444 |
| 2 | Australian Labor Party | Female | 60 | 51.282051 |
| 3 | Australian Labor Party | Male | 57 | 48.717949 |
| 4 | Centre Alliance | Female | 1 | 50.000000 |
| 5 | Centre Alliance | Male | 1 | 50.000000 |
| 6 | Coalition | Female | 33 | 26.400000 |
| 7 | Coalition | Male | 92 | 73.600000 |
| 8 | Independent | Female | 9 | 69.230769 |
| 9 | Independent | Male | 4 | 30.769231 |
| 10 | Jacqui Lambie Network | Female | 2 | 100.000000 |
| 11 | Katter's Australian Party | Male | 1 | 100.000000 |
| 12 | Liberal Democratic Party | Female | 1 | 100.000000 |
| 13 | Pauline Hanson's One Nation | Female | 1 | 50.000000 |
| 14 | Pauline Hanson's One Nation | Male | 1 | 50.000000 |
| 15 | United Australia Party | Male | 1 | 100.000000 |
| 16 | United Australia Party [2018] | Male | 1 | 100.000000 |
# create a cut of members by age
from datetime import date
today = date.today()
members["age"] = pd.to_datetime(members["dob"]).apply(
lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))
members["age_group"] = pd.cut(members["age"], bins=[18, 21, 30, 40, 50, 60, 70, 80, 90, 159],
labels=["18-20", "21-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91+"],
ordered=False)
age_counts = members.groupby(["Gender", "chamber", "age_group"]).size().reset_index(name='count')
age_counts
# create a plot of members by age_counts
import plotly.express as px
px.bar(age_counts, x="age_group", y="count", color="Gender", barmode="group", facet_col="chamber",
title="Age breakdown of Australian politicians by chamber")
# two female memebers missing 2 male members 4 male senators
chamber_counts = members.groupby(["Gender", "chamber"]).size().reset_index(name='count')
px.bar(age_counts, x="chamber", y="count", color="Gender", barmode="group", title="Gender breakdown by chamber")
# calculate members age using their date of birth grouped by party
from datetime import date
today = date.today()
members["age"] = pd.to_datetime(members["dob"]).apply(
lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))
members.groupby(["party"]).agg({"age": ["mean", "std"]})
| age | ||
|---|---|---|
| mean | std | |
| party | ||
| Australian Greens | 50.333333 | 11.812257 |
| Australian Labor Party | 52.743590 | 9.897016 |
| Centre Alliance | 57.500000 | 10.606602 |
| Coalition | 52.983871 | 8.529227 |
| Independent | 51.692308 | 6.848170 |
| Jacqui Lambie Network | 52.000000 | 0.000000 |
| Katter's Australian Party | 77.000000 | NaN |
| Liberal Democratic Party | 55.000000 | NaN |
| Pauline Hanson's One Nation | 67.500000 | 0.707107 |
| United Australia Party | 39.000000 | NaN |
| United Australia Party [2018] | 59.000000 | NaN |
import plotly.express as px
fig = px.bar(party_counts, x="party", y="%", color="Gender",
title="Gender breakdown of Australian politicians by party")
fig.show()
members_education = pd.read_sql("""
SELECT * from member_secondary_school_education_47
""", engine)
members_education["al_school_sector"].value_counts()
al_school_sector Government 145 Independent 97 Catholic 67 Name: count, dtype: int64
school_sector_counts = members_education.drop_duplicates(["member", "school_sector"]).groupby(
["party", "school_sector"]).size().reset_index(name='count')
school_sector_counts
| party | school_sector | count | |
|---|---|---|---|
| 0 | Australian Greens | Non-government | 6 |
| 1 | Australian Greens | Public | 12 |
| 2 | Australian Labor Party | Non-government | 60 |
| 3 | Australian Labor Party | Public | 52 |
| 4 | Centre Alliance | Non-government | 2 |
| 5 | Centre Alliance | Public | 1 |
| 6 | Coalition | Non-government | 67 |
| 7 | Coalition | Public | 60 |
| 8 | Independent | Non-government | 9 |
| 9 | Independent | Public | 5 |
| 10 | Jacqui Lambie Network | Non-government | 1 |
| 11 | Jacqui Lambie Network | Public | 1 |
| 12 | Liberal Democratic Party | Non-government | 1 |
| 13 | Pauline Hanson's One Nation | Non-government | 1 |
| 14 | Pauline Hanson's One Nation | Public | 1 |
| 15 | United Australia Party | Non-government | 1 |
| 16 | United Australia Party [2018] | Non-government | 1 |
# create a plot of members school sector type by party
school_sector_counts = members_education.drop_duplicates(["member", "school_sector"]).groupby(
["school_sector", "party", ]).size().reset_index(name='count')
fig = px.bar(school_sector_counts, x="party", y="count", color="school_sector", barmode="group",
title="School sector breakdown of Australian politicians by party")
fig.show()
members
school_sector_counts = members_education.drop_duplicates(["member", "school_sector"]).groupby(
["party", "school_sector"]).size().reset_index(name='count')
fig = px.bar(school_sector_counts, x="school_sector", y="count", color="party",
title="School sector breakdown of Australian politicians by party")
fig.show()
Public is Government 64.4605631350012 Catholic 19.6750931104224 Independent 15.8643437545764
47th Parliament is Government 47% Catholic 20% Independent 33%
population_school_sector = {
"Catholic": 19.7,
"Government": 64.5,
"Independent": 15.8643437545764}
pop_school_sector = pd.DataFrame.from_dict(population_school_sector, orient="index").reset_index()
pop_school_sector["group"] = "Australian Population"
pop_school_sector.columns = ["school_sector", "%", "group"]
pop_school_sector
| school_sector | % | group | |
|---|---|---|---|
| 0 | Catholic | 19.700000 | Australian Population |
| 1 | Government | 64.500000 | Australian Population |
| 2 | Independent | 15.864344 | Australian Population |
# create a plot of members as a percentage school sector
school_sector_counts = members_education.drop_duplicates(["member", "al_school_sector"]).groupby(
["al_school_sector"]).size().reset_index(name='count')
school_sector_counts.rename(columns={"al_school_sector": "school_sector"}, inplace=True)
school_sector_counts["%"] = (school_sector_counts["count"] / school_sector_counts["count"].sum()) * 100
school_sector_counts["group"] = "Australian Politicians"
fig = px.bar(pd.concat([school_sector_counts, pop_school_sector]), x="school_sector", y="%",
color="group",
barmode="group", title="School sector breakdown of Australian politicians")
fig.show()
# create a plot of members as a percentage school sector depending on if is_representative or senate
school_sector_counts = members_education.drop_duplicates(["member", "al_school_sector"]).groupby(
["chamber", "al_school_sector"]).size().reset_index(name='count')
school_sector_counts["%"] = (school_sector_counts["count"] / school_sector_counts.groupby("chamber")[
"count"].transform("sum")) * 100
fig = px.bar(school_sector_counts, x="chamber", y="%", color="al_school_sector",
title="School sector breakdown of Australian politicians by party", labels={"chamber": "Chamber"})
fig.show()
# lets extract out our acara_financial data and see which school sectors are in our data
import geopandas as gpd
load_dotenv("../.env")
if USE_POSTGRES:
gdf = gpd.read_postgis(
'SELECT m.*, "total enrolments" as total_students FROM member_secondary_school_education_47 m LEFT JOIN acara_school_profile_2022 a on m.acara_id = a."acara sml id"::int',
engine, geom_col='geom')
else:
gdf_47 = gpd.read_file(geopackage, layer="member_secondary_school_education_47")
gdf_47 = gdf_47.convert_dtypes()
asp_47 = gpd.read_file(geopackage, layer="acara_school_profile_2022")[["acara sml id", "total enrolments"]]
asp_47 = asp_47.convert_dtypes()
gdf = pd.merge(gdf_47, asp_47, left_on="acara_id", right_on="acara sml id", how="left")
gdf.rename(columns={"total enrolments": "total_students"}, inplace=True)
gdf = gdf.convert_dtypes()
# fill missing students with min 50 students
gdf["total_students"] = gdf["total_students"].fillna('50').apply(lambda x: '50' if not x else x)
gdf["total_students"] = gdf["total_students"].astype(int)
# Group by party_abbrev and school_sector and avg total government funding
gdf["australian_government_recurrent_funding_per_student"] = gdf[
"australian_government_recurrent_funding_per_student"].fillna(0).astype(int)
gdf["state__territory_government_recurring_funding_per_student"] = gdf[
"state__territory_government_recurring_funding_per_student"].fillna(0).astype(int)
gdf["other_private_sources_per_student"] = gdf["other_private_sources_per_student"].fillna(0).astype(int)
gdf["total_gross_income_per_student"] = gdf["total_gross_income_per_student"].fillna(0).astype(int)
grouped = gdf.groupby(["party_abbrv", "school_sector"]).agg(
{"other_private_sources_per_student": "mean", "total_gross_income_per_student": "mean"}).reset_index()
px.bar(grouped, x="party_abbrv", y="other_private_sources_per_student", color="school_sector", barmode="group")
px.bar(grouped, x="party_abbrv", y="total_gross_income_per_student", color="school_sector", barmode="group")
# Select most common secondary school from members_secondary_school APH data
most_common_schools = pd.read_sql(
"SELECT secondaryschool,count(*) as members_attending FROM members_secondary_school WHERE secondaryschool != '' and secondaryschool is not null GROUP BY secondaryschool ORDER BY count(*) desc limit 10",
engine)
most_common_schools
| secondaryschool | members_attending | |
|---|---|---|
| 0 | Scotch College | 5 |
| 1 | St Ignatius College | 4 |
| 2 | St Peter's College | 4 |
| 3 | Wesley College | 4 |
| 4 | Marist College | 3 |
| 5 | Geelong Grammar School | 3 |
| 6 | Norwood High School | 2 |
| 7 | Aquinas College | 2 |
| 8 | Christian Brothers' College | 2 |
| 9 | Scotch College Melbourne | 2 |
# select those who attended the same school get value counts of name and show members who attended the same school
school_counts = members_education["name"].value_counts()
pd.merge(members_education, school_counts[school_counts > 1].reset_index(), left_on="name", right_on="name")[
["name", "member", "party", "school_sector", "count"]].sort_values(["name", "member"])
| name | member | party | school_sector | count | |
|---|---|---|---|---|---|
| 78 | Alfred Deakin High School | Andrew Laming | Coalition | Public | 2 |
| 77 | Alfred Deakin High School | Phillip Thompson | Coalition | Public | 2 |
| 47 | Anglican Church Grammar School | Glenn Sterle | Australian Labor Party | Non-government | 2 |
| 46 | Anglican Church Grammar School | Matt Thistlethwaite | Australian Labor Party | Non-government | 2 |
| 29 | Aquinas College | Celia Hammond | Coalition | Non-government | 2 |
| ... | ... | ... | ... | ... | ... |
| 33 | Wesley College, Victoria | Shayne Neumann | Australian Labor Party | Non-government | 2 |
| 8 | Woonona High School | Karen Grogan | Australian Labor Party | Public | 2 |
| 9 | Woonona High School | Mary Doyle | Australian Labor Party | Public | 2 |
| 20 | Xavier College | David Pocock | Independent | Non-government | 2 |
| 21 | Xavier College | Katie Allen | Coalition | Non-government | 2 |
83 rows × 5 columns
# select members who don't have a high school in education data
pd.read_sql(
"SELECT * FROM members WHERE id not in (SELECT member_id FROM member_education JOIN education e on member_education.education_id = e.id WHERE e.is_high_school = TRUE ) AND (high_school is null or high_school != 'International' ) ",
engine)
| id | orig_id | orig_table | member | district | is_senator | is_representative | graduated | start | wiki_link | dob | mp_id | party_id | chamber | high_school | preferred_name | aph_name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 86 | 3 | 47 | Alex Antic | South Australia | True | False | True | 2019-07-01 | http://www.wikidata.org/entity/Q63520981 | 1974-12-22 | 269375 | 42 | senate | Public | Alex Antic | Alexander Antic |
| 1 | 122 | 12 | 46 | Vince Connelly | Stirling | False | True | True | 2019-05-18 | http://www.wikidata.org/entity/Q64585316 | 1978-09-16 | 282984 | 42 | house | Non-government | Vince Connelly | Vincent Connelly |
| 2 | 90 | 106 | 47 | Karen Grogan | South Australia | True | False | True | 2021-09-21 | http://www.wikidata.org/entity/Q108617920 | 1960-01-01 | 296331 | 11 | senate | None | None | Karen Grogan |
| 3 | 243 | 21 | 46 | Nicolle Flint | Boothby | False | True | True | 2016-07-02 | http://www.wikidata.org/entity/Q25756083 | 1978-07-15 | 245550 | 42 | house | Non-government | None | Nicolle Flint |
| 4 | 75 | 151 | 47 | Michelle Ananda-Rajah | Higgins | False | True | True | 2022-05-21 | http://www.wikidata.org/entity/Q90398588 | 1972-12-10 | 290544 | 11 | house | None | None | Michelle Ananda-Rajah |
| 5 | 142 | 188 | 47 | Sam Birrell | Nicholls | False | True | True | 2022-05-21 | http://www.wikidata.org/entity/Q112131083 | 1975-01-01 | 288713 | 42 | house | None | None | Sam Birrell |
| 6 | 99 | 155 | 47 | Milton Dick | Oxley | False | True | True | 2016-07-02 | http://www.wikidata.org/entity/Q20983429 | 1972-07-21 | 53517 | 11 | house | Non-government | Milton Dick | Dugald Dick |
| 7 | 93 | 173 | 47 | Peter Khalil | Wills | False | True | True | 2016-07-02 | http://www.wikidata.org/entity/Q25756187 | 1973-03-23 | 101351 | 11 | house | Non-government | None | Peter Khalil |
| 8 | 251 | 182 | 47 | Rob Mitchell | McEwen | False | True | True | 2010-08-21 | http://www.wikidata.org/entity/Q7340383 | 1967-09-09 | M3E | 11 | house | Public | Rob Mitchell | Robert Mitchell |
| 9 | 152 | 123 | 47 | Llew O'Brien | Wide Bay | False | True | False | 2016-07-02 | http://www.wikidata.org/entity/Q25756234 | 1972-06-26 | 265991 | 42 | house | None | Llew O'Brien | Llewellyn O'Brien |
| 10 | 57 | 75 | 47 | Graham Perrett | Moreton | False | True | True | 2007-11-24 | http://www.wikidata.org/entity/Q5593142 | 1966-01-05 | HVP | 11 | house | Public | None | Graham Perrett |
| 11 | 23 | 200 | 47 | Stephen Bates | Brisbane | False | True | True | 2022-05-21 | http://www.wikidata.org/entity/Q112150148 | 1992-11-23 | 300246 | 12 | house | None | None | Stephen Bates |
| 12 | 175 | 219 | 47 | Tracey Roberts | Pearce | False | True | True | 2022-05-21 | http://www.wikidata.org/entity/Q109850050 | 1960-01-01 | 157125 | 11 | house | None | None | Tracey Roberts |
| 13 | 103 | 48 | 46 | Julian Simmonds | Ryan | False | True | True | 2019-05-18 | http://www.wikidata.org/entity/Q64226194 | 1985-08-29 | 282983 | 42 | house | Non-government | None | Julian Simmonds |
| 14 | 29 | 49 | 46 | Arthur Sinodinos | New South Wales | True | False | True | 2011-10-13 | http://www.wikidata.org/entity/Q4800293 | 1957-02-25 | BV7 | 42 | senate | Public | None | Arthur Sinodinos |
| 15 | 160 | 132 | 47 | Marielle Smith | South Australia | True | False | True | 2019-07-01 | http://www.wikidata.org/entity/Q63531157 | 1986-12-30 | 281603 | 11 | senate | Both | None | Marielle Smith |
| 16 | 207 | 98 | 47 | Jordon Steele-John | Western Australia | True | False | True | 2017-11-10 | http://www.wikidata.org/entity/Q33139188 | 1994-10-14 | 250156 | 12 | senate | Home-Schooled | None | Jordon Steele-John |
| 17 | 13 | 86 | 47 | Jana Stewart | Victoria | True | False | True | 2022-04-06 | http://www.wikidata.org/entity/Q111513180 | 1987-01-01 | 299352 | 11 | senate | None | None | Jana Stewart |
| 18 | 222 | 206 | 47 | Susan Templeman | Macquarie | False | True | True | 2016-07-02 | http://www.wikidata.org/entity/Q25753992 | 1963-07-30 | 181810 | 11 | house | Public | None | Susan Templeman |
| 19 | 67 | 94 | 47 | Jess Walsh | Victoria | True | False | True | 2019-07-01 | http://www.wikidata.org/entity/Q64711059 | 1971-05-16 | 252157 | 11 | senate | Non-government | None | Jess Walsh |
| 20 | 82 | 174 | 47 | Peter Whish-Wilson | Tasmania | True | False | True | 2012-06-20 | http://www.wikidata.org/entity/Q7177677 | 1968-02-24 | 195565 | 12 | senate | Both | None | Peter Whish-Wilson |
# Do percentage change in public to non-goverment school from parliamnent 46 to 47
ph_46 = pd.read_sql("SELECT * FROM member_secondary_school_education_46", engine)
ph_47 = pd.read_sql("SELECT * FROM member_secondary_school_education_47", engine)
school_sector_counts_46 = ph_46.drop_duplicates(["member", "school_sector"]).groupby(
["party", "school_sector"]).size().reset_index(name='count')
school_sector_counts_46["%"] = (school_sector_counts_46["count"] / school_sector_counts_46.groupby("party")[
"count"].transform("sum")) * 100
school_sector_counts_46["parliament"] = "46th Parliament"
school_sector_counts_47 = ph_47.drop_duplicates(["member", "school_sector"]).groupby(
["party", "school_sector"]).size().reset_index(name='count')
school_sector_counts_47["%"] = (school_sector_counts_47["count"] / school_sector_counts_47.groupby("party")[
"count"].transform("sum")) * 100
school_sector_counts_46["parliament"] = "46th Parliament"
school_sector_counts_47["parliament"] = "47th Parliament"
# calculate differences between school_sector_counts_46 and school_sector_counts
pd.concat([school_sector_counts_46, school_sector_counts_47]).sort_values(["party", "school_sector"])
| party | school_sector | count | % | parliament | |
|---|---|---|---|---|---|
| 0 | Australian Greens | Non-government | 9 | 75.000000 | 46th Parliament |
| 0 | Australian Greens | Non-government | 6 | 33.333333 | 47th Parliament |
| 1 | Australian Greens | Public | 3 | 25.000000 | 46th Parliament |
| 1 | Australian Greens | Public | 12 | 66.666667 | 47th Parliament |
| 2 | Australian Labor Party | Non-government | 48 | 49.484536 | 46th Parliament |
| 2 | Australian Labor Party | Non-government | 60 | 53.571429 | 47th Parliament |
| 3 | Australian Labor Party | Public | 49 | 50.515464 | 46th Parliament |
| 3 | Australian Labor Party | Public | 52 | 46.428571 | 47th Parliament |
| 4 | Centre Alliance | Non-government | 1 | 50.000000 | 46th Parliament |
| 4 | Centre Alliance | Non-government | 2 | 66.666667 | 47th Parliament |
| 5 | Centre Alliance | Public | 1 | 50.000000 | 46th Parliament |
| 5 | Centre Alliance | Public | 1 | 33.333333 | 47th Parliament |
| 6 | Coalition | Non-government | 63 | 56.250000 | 46th Parliament |
| 6 | Coalition | Non-government | 67 | 52.755906 | 47th Parliament |
| 7 | Coalition | Public | 49 | 43.750000 | 46th Parliament |
| 7 | Coalition | Public | 60 | 47.244094 | 47th Parliament |
| 8 | Independent | Non-government | 4 | 66.666667 | 46th Parliament |
| 8 | Independent | Non-government | 9 | 64.285714 | 47th Parliament |
| 9 | Independent | Public | 2 | 33.333333 | 46th Parliament |
| 9 | Independent | Public | 5 | 35.714286 | 47th Parliament |
| 10 | Jacqui Lambie Network | Non-government | 1 | 50.000000 | 47th Parliament |
| 10 | Jacqui Lambie Network | Public | 1 | 100.000000 | 46th Parliament |
| 11 | Jacqui Lambie Network | Public | 1 | 50.000000 | 47th Parliament |
| 11 | Katter's Australian Party | Public | 1 | 100.000000 | 46th Parliament |
| 12 | Liberal Democratic Party | Non-government | 1 | 100.000000 | 46th Parliament |
| 12 | Liberal Democratic Party | Non-government | 1 | 100.000000 | 47th Parliament |
| 13 | Pauline Hanson's One Nation | Non-government | 1 | 50.000000 | 47th Parliament |
| 13 | Pauline Hanson's One Nation | Public | 1 | 100.000000 | 46th Parliament |
| 14 | Pauline Hanson's One Nation | Public | 1 | 50.000000 | 47th Parliament |
| 15 | United Australia Party | Non-government | 1 | 100.000000 | 47th Parliament |
| 16 | United Australia Party [2018] | Non-government | 1 | 100.000000 | 47th Parliament |
school_sector_counts_prev = ph_46.drop_duplicates(["member", "al_school_sector"]).groupby(
["al_school_sector"]).size().reset_index(name='count')
school_sector_counts_prev.rename(columns={"al_school_sector": "school_sector"}, inplace=True)
school_sector_counts_prev["%"] = (school_sector_counts_prev["count"] / school_sector_counts_prev["count"].sum()) * 100
school_sector_counts_prev["group"] = "Australian Politicians 46th Parliament"
school_sector_counts = ph_47.drop_duplicates(["member", "al_school_sector"]).groupby(
["al_school_sector"]).size().reset_index(name='count')
school_sector_counts.rename(columns={"al_school_sector": "school_sector"}, inplace=True)
school_sector_counts["%"] = (school_sector_counts["count"] / school_sector_counts["count"].sum()) * 100
school_sector_counts["group"] = "Australian Politicians 47th Parliament"
fig = px.bar(pd.concat([school_sector_counts, school_sector_counts_prev, pop_school_sector]), x="school_sector", y="%",
color="group",
barmode="group", title="School sector breakdown of Australian politicians past 2 parliaments")
fig.show()
from pywaffle import Waffle
import matplotlib.pyplot as plt
# slightly different to SMH as they set those who did not graduate or who attended overseas to other
members_46 = pd.read_sql("SELECT * FROM member_aph_46", engine)
members_46_high_school = members_46.groupby("high_school").size().reset_index(name='count')
members_46_high_school["%"] = (members_46_high_school["count"] / members_46_high_school["count"].sum()) * 100
members_46_high_school
| high_school | count | % | |
|---|---|---|---|
| 0 | Both | 17 | 7.327586 |
| 1 | Home-Schooled | 1 | 0.431034 |
| 2 | International | 2 | 0.862069 |
| 3 | Non-government | 114 | 49.137931 |
| 4 | Public | 98 | 42.241379 |